* Read raw IPUMS data
* 2/2/2025

clear all
set more off

program main
	clean_policy_database
	code_federal_ssi_amt
	clean_state_covariates 
	read_ipums_data
end

program clean_policy_database 
	// SNAP policy database from USDA: https://www.ers.usda.gov/data-products/snap-policy-data-sets
	// downloaded the 2/29/2024 version on May 17, 2025
	import excel using "$raw/SNAPPolicyDatabase_2024.xlsx", clear firstrow sheet (SNAP Policy Database)
	tostring yearmonth, replace
	gen year = substr(yearmonth, 1, 4)
	destring year, replace
	
	keep state_fips statename year bbce* call face* fingerprint oapp reportsimple cap ///
		vehexclall vehexclone nonciteldfull certeld0103 certeld0406 outreach
	gen vehexcl = (vehexclone == 1 | vehexclall == 1)
	gen certeld0106 = certeld0103 + certeld0406
	drop bbce_inclmt bbce_a_amt bbce_a_veh
	ren state_fips statefip
	
	replace oapp = 1 if oapp == 2   // code 2: policy applies to only some parts of the state
	replace fingerprint = 1 if fingerprint == 2
	
	foreach var of varlist bbce call faceini facerec oapp reportsimple cap ///
		vehexcl nonciteldfull {
			sort statefip year
			gen `var'_start = year if `var' == 1 & `var'[_n-1] == 0 ///
				& statefip == statefip[_n-1]
			bys statefip (year): egen `var'_startyear = max(`var'_start)
			drop `var'_start
			tab `var'_start
	}
	gen no_fingerprint_start = year if fingerprint == 0 & fingerprint[_n-1] == 1 ///
		& statefip == statefip[_n-1]
	bys statefip (year): egen no_fp_startyear = max(no_fingerprint_start)
	drop no_fingerprint_start
	
	preserve  // save dataset with all policies used in Jones et al. (2022)
		keep statefip year *startyear certeld0106 outreach
		gcollapse (mean) certeld0106 outreach (max) *startyear, by(statefip year)
		save "$for_analysis/snap_policies_jones_et_al", replace
	restore
	
	keep statefip *startyear
	duplicates drop
	save "$for_analysis/snap_policies", replace  // main controls
end

program code_federal_ssi_amt // for target sample definition: federal max SSI benefits
	// data obtained from https://www.ssa.gov/oact/cola/SSIamts.html in August 2022 
	clear
	set obs 17
	gen year = 2000
	forval i = 1/17 {
		replace year = `i' + 2000 if `i' + 1 == _n
	}

	* code monthly SSI benefit amounts
	gen indiv_month = 513
	gen couple_month = 769
	replace indiv_month = 531 if year == 2001
	replace indiv_month = 545 if year == 2002
	replace indiv_month = 552 if year == 2003
	replace indiv_month = 564 if year == 2004
	replace indiv_month = 579 if year == 2005
	replace indiv_month = 603 if year == 2006
	replace indiv_month = 623 if year == 2007
	replace indiv_month = 637 if year == 2008
	replace indiv_month = 674 if year == 2009 | year == 2010 | year == 2011
	replace indiv_month = 698 if year == 2012
	replace indiv_month = 710 if year == 2013
	replace indiv_month = 721 if year == 2014
	replace indiv_month = 733 if year == 2015 | year == 2016

	replace couple_month = 796 if year == 2001
	replace couple_month = 817 if year == 2002
	replace couple_month = 829 if year == 2003
	replace couple_month = 846 if year == 2004
	replace couple_month = 869 if year == 2005
	replace couple_month = 904 if year == 2006
	replace couple_month = 934 if year == 2007
	replace couple_month = 956 if year == 2008
	replace couple_month = 1011 if year == 2009 | year == 2010 | year == 2011
	replace couple_month = 1048 if year == 2012
	replace couple_month = 1066 if year == 2013
	replace couple_month = 1082 if year == 2014
	replace couple_month = 1100 if year == 2015 | year == 2016

	* create yearly SSI benefit variable 
	gen indiv = indiv_month * 12
	gen couple = couple_month * 12
	drop indiv_month couple_month

	save "$intermediate/federal_SSI_0016", replace
end

program clean_state_covariates
	// unemployment data from the BLS, downloaded on July 15, 2023 (re-downloaded May 17, 2025)
	// https://www.bls.gov/web/laus/ststdsadata.txt
	import excel using "$raw/ststdsadata.xlsx", clear firstrow
	ren (Statesand B C D K) (statefip state year month unemp)
	drop if _n < 8  // extraneous header rows
	replace state = lower(state)
	keep state statefip year month unemp
	destring month year statefip unemp, replace

	keep if month == 07 	// keep July rates for each year
	keep if year >= 2000 & year <= 2016
	drop month
	save "$intermediate/state_unemp_0016", replace
	
	// political data (Table A-86) from Census Bureau, downloaded on December 30, 2022
	// most recent download: May 17 2025
	// https://www.census.gov/library/publications/2006/compendia/smadb06.html
	import excel using "$raw/TableA-86.xls", clear firstrow
	ren (TableA86 C) (state perc_democrat)
	drop if _n < 7  // extraneous header rows
	drop if _n > 51  // extraneous footer rows
	drop if state == "District of Columbia"
	replace state = regexr(state, " [0-9]$", "")  // remove trailing numbers
	keep state perc_democrat
	destring perc_democrat, replace
	replace state = lower(state)
	save "$intermediate/state_political", replace
	
	// median household income (Table A-34) from Census Bureau, downloaded December 30, 2022
	// most recent download: May 17 2025
	// https://www.census.gov/library/publications/2006/compendia/smadb06.html
	import excel using "$raw/TableA-34.xls", clear firstrow
	ren (TableA34 B) (state hh_income_2003)
	drop if _n < 5  // extraneous header rows
	drop if _n > 51  // extraneous footer rows
	keep state hh_income_2003
	destring hh_income_2003, replace
	replace state = lower(state)
	save "$intermediate/state_income", replace
end

program read_ipums_data
	// raw data is extract 11 from https://usa.ipums.org/usa-action/data_requests/download
	// downloaded February 2, 2025
	use "$raw/usa_00011.dta", clear
	assert !mi(year) & inrange(year, 2000, 2016)
	assert !mi(perwt)
	assert !mi(age)
	assert !mi(statefip)
	gisid sample serial pernum // unique identifier
	keep if age >= 60 // include age 60-65 for less restrictive CAPs and placebo test
	
	* merge in SNAP policy adoption years
	merge m:1 statefip using "$for_analysis/snap_policies", assert(2 3) keep(3) nogen 
	
	* merge in maximum federal SSI benefit amounts, for target sample definitions
	merge m:1 year using "$intermediate/federal_SSI_0016", ///
		assert(3) keep(3) nogen
		
	* merge in state-level covariates
	decode statefip, gen(state)
	di "merge unemployment"
	merge m:1 state year using "$intermediate/state_unemp_0016", ///
		assert(2 3) keep(3) nogen
	di "merge political data"
	merge m:1 state using "$intermediate/state_political", ///
		assert(1 2 3) keep(1 3) gen(merge_political)
	di "merge median HH income"
	merge m:1 state using "$intermediate/state_income", ///
		assert(1 2 3) keep(1 3) gen(merge_med_inc)
	assert statefip == 11 if (merge_political == 1 | merge_med_inc == 1) // Washington D.C.
	drop merge_*
	ren (perc_democrat unemp) (perc_democrat_2004 state_unemp)
	
	* save dataset for state-level design (both treated and control states)
	save "$intermediate/states_raw_0016", replace
	
	* restrict to treated states for singles-couples design
	assert !mi(statefip)
	keep if inlist(statefip, 45, 12, 25, 36, 48, 53, 28, 37) | ///
		inlist(statefip, 42, 51, 21, 22, 26, 46, 4, 34, 35, 24)
	tab statefip
	save "$intermediate/sc_raw_0016", replace
end


* Execute
main
